{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"machine_shape":"hm","gpuType":"A100","authorship_tag":"ABX9TyN7B/DyTicnoD1NYMMmOFb2"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","gpuClass":"standard","widgets":{"application/vnd.jupyter.widget-state+json":{"481107966aab4156a59431683e0df7b6":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_dbe1cb37650b484a9fcfaa348ab50a43","IPY_MODEL_9495e17f703b4215b59ded17a7b9ab74","IPY_MODEL_25baacb633ed43aca8d4a8c8e4a9916b"],"layout":"IPY_MODEL_e723c6f2ed854b40804ac5ab4b65f1d7"}},"dbe1cb37650b484a9fcfaa348ab50a43":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3673056c227f4073bf4344a7a01a7ed6","placeholder":"​","style":"IPY_MODEL_81f8775d6464467e88c63e8a21a31e80","value":"Map: 100%"}},"9495e17f703b4215b59ded17a7b9ab74":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"","description":"","description_tooltip":null,"layout":"IPY_MODEL_74fcc9d14c1d41a48b20176a52cc13ca","max":400,"min":0,"orientation":"horizontal","style":"IPY_MODEL_242eaeab548e4dc5a2c03950e604e627","value":400}},"25baacb633ed43aca8d4a8c8e4a9916b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_43295c865e11477384102d802e8976e5","placeholder":"​","style":"IPY_MODEL_e7b5570d2ee94b318723c26916e9e9ec","value":" 400/400 [00:00&lt;00:00, 3200.94 examples/s]"}},"e723c6f2ed854b40804ac5ab4b65f1d7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":"hidden","width":null}},"3673056c227f4073bf4344a7a01a7ed6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"81f8775d6464467e88c63e8a21a31e80":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"74fcc9d14c1d41a48b20176a52cc13ca":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"242eaeab548e4dc5a2c03950e604e627":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"43295c865e11477384102d802e8976e5":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e7b5570d2ee94b318723c26916e9e9ec":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"26878d2d98844aae9fe193e6657a1356":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0b27c869b2fd4c7697c3b538b3572d47","IPY_MODEL_88e27d88192e42879e1ccaf5374cabf3","IPY_MODEL_cb6e7d847c9047d1ba62630250c4073e"],"layout":"IPY_MODEL_d299b2d31cc04fa4aec1755ae6cc1f75"}},"0b27c869b2fd4c7697c3b538b3572d47":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_daa952c83ff643fba859e9f4f6f7ccc6","placeholder":"​","style":"IPY_MODEL_09e144fa726c4720b28048395e06f2a1","value":"Map:   0%"}},"88e27d88192e42879e1ccaf5374cabf3":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"","description":"","description_tooltip":null,"layout":"IPY_MODEL_64881cdc4f254b96b87d1116bb571f46","max":150,"min":0,"orientation":"horizontal","style":"IPY_MODEL_3cc12c445d0e48488f619d84e9f5d135","value":150}},"cb6e7d847c9047d1ba62630250c4073e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d27a2d3470b64cf9b3229db4d4360ba2","placeholder":"​","style":"IPY_MODEL_ec6114f0321a4f43a31dbe9f9c0a42b9","value":" 0/150 [00:00&lt;?, ? examples/s]"}},"d299b2d31cc04fa4aec1755ae6cc1f75":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":"hidden","width":null}},"daa952c83ff643fba859e9f4f6f7ccc6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"09e144fa726c4720b28048395e06f2a1":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"64881cdc4f254b96b87d1116bb571f46":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3cc12c445d0e48488f619d84e9f5d135":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d27a2d3470b64cf9b3229db4d4360ba2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ec6114f0321a4f43a31dbe9f9c0a42b9":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9e8c01bebd1143fcbed9497b3b408d0c":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_fcbb91e4bd204c839a2be730663b401b","IPY_MODEL_493a7dbd0d48411687f7c027bc735995","IPY_MODEL_8e3d180feb564c8384a18c411e0ac8b4"],"layout":"IPY_MODEL_4e4fab46b35f4107ae855b46e0ebdbd1"}},"fcbb91e4bd204c839a2be730663b401b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3144d2eb23744f56a8e623c68568134e","placeholder":"​","style":"IPY_MODEL_3c8daa8ea9ce4ab0a2b17ea225ecfa2b","value":"Loading checkpoint shards: 100%"}},"493a7dbd0d48411687f7c027bc735995":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_49aeecf9231e421eb7e7a57ac6f99326","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_016230e67725421498e1658f898d2802","value":2}},"8e3d180feb564c8384a18c411e0ac8b4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_544a7e6c6e894431ad441fed9963d5fd","placeholder":"​","style":"IPY_MODEL_2271d6d9d7fa4267aaadd9286c2ee8f2","value":" 2/2 [00:06&lt;00:00,  2.83s/it]"}},"4e4fab46b35f4107ae855b46e0ebdbd1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3144d2eb23744f56a8e623c68568134e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3c8daa8ea9ce4ab0a2b17ea225ecfa2b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"49aeecf9231e421eb7e7a57ac6f99326":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"016230e67725421498e1658f898d2802":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"544a7e6c6e894431ad441fed9963d5fd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2271d6d9d7fa4267aaadd9286c2ee8f2":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"57015f35b7174035846b2de909a91231":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_dcdb3205db7f40dd89e7b30d5ee12c48","IPY_MODEL_414d623a720941c38d8a040f1f190d79","IPY_MODEL_1f9154f849ab496e9ad68097df48c8a7"],"layout":"IPY_MODEL_d1c912fdd4f341a5875fbe1f2e7f9ee7"}},"dcdb3205db7f40dd89e7b30d5ee12c48":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_74a593ceaafc4ed1a63c4a2b8aa2e52a","placeholder":"​","style":"IPY_MODEL_0b9db00634b3402184692d6e3eee5f66","value":"Loading checkpoint shards: 100%"}},"414d623a720941c38d8a040f1f190d79":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_8b60a8b20d2a425b938eac315384e566","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_889939a0ea3644afb5d82d6b230f3928","value":2}},"1f9154f849ab496e9ad68097df48c8a7":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9951c651e8124717823560a17ebf50cb","placeholder":"​","style":"IPY_MODEL_ff74469f66764f89821cd7767c6ff8d0","value":" 2/2 [00:07&lt;00:00,  3.14s/it]"}},"d1c912fdd4f341a5875fbe1f2e7f9ee7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"74a593ceaafc4ed1a63c4a2b8aa2e52a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0b9db00634b3402184692d6e3eee5f66":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8b60a8b20d2a425b938eac315384e566":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"889939a0ea3644afb5d82d6b230f3928":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"9951c651e8124717823560a17ebf50cb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ff74469f66764f89821cd7767c6ff8d0":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"17c9881396b74ca6ae5090f2a194f69c":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c0bdf2475aa447f3875ebc6f8a8a76a8","IPY_MODEL_f0624e09f92c439f91ae13dc9b5adf96","IPY_MODEL_384b8f71077e405a9f04b42bef8149c4"],"layout":"IPY_MODEL_015c86573d7e4a948d33575053cf0385"}},"c0bdf2475aa447f3875ebc6f8a8a76a8":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_31b65b30a33b4ca79b37d99e3329b0cc","placeholder":"​","style":"IPY_MODEL_389f5ec78b354785bd97538ac7f12bac","value":"100%"}},"f0624e09f92c439f91ae13dc9b5adf96":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_847385c97f6c4aee93f20630f18ed134","max":3,"min":0,"orientation":"horizontal","style":"IPY_MODEL_278da2b240834b1da66431fc315a4510","value":3}},"384b8f71077e405a9f04b42bef8149c4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_068a82e35d394be3a8463e487e8d19a3","placeholder":"​","style":"IPY_MODEL_0b963b43ed7c45a58d616bd4743a0126","value":" 3/3 [00:00&lt;00:00, 176.84it/s]"}},"015c86573d7e4a948d33575053cf0385":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"31b65b30a33b4ca79b37d99e3329b0cc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"389f5ec78b354785bd97538ac7f12bac":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"847385c97f6c4aee93f20630f18ed134":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"278da2b240834b1da66431fc315a4510":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"068a82e35d394be3a8463e487e8d19a3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0b963b43ed7c45a58d616bd4743a0126":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8a159fbe557442fd88c90fab4ac89a73":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e01b9ea70d1c47309049add36cd51f23","IPY_MODEL_7c141986b4124973be44466e631847c5","IPY_MODEL_be48a134036c4a23994f62ed7aec397f"],"layout":"IPY_MODEL_0eeb221d7e0b42c695fb70f9b351e8e6"}},"e01b9ea70d1c47309049add36cd51f23":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5e28df63028b426da963f70f33c560d6","placeholder":"​","style":"IPY_MODEL_f6394ad7c2e447a39031524bb7e8bee7","value":"Map: 100%"}},"7c141986b4124973be44466e631847c5":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"","description":"","description_tooltip":null,"layout":"IPY_MODEL_82d30fc6fc2e46ada32af5ddc808951b","max":400,"min":0,"orientation":"horizontal","style":"IPY_MODEL_4e1a6d0b34b144338eae1b4882a1f3aa","value":400}},"be48a134036c4a23994f62ed7aec397f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dff211d5707f41e0a202a04792c8cb43","placeholder":"​","style":"IPY_MODEL_f72483b283c546b7b71a29ade27a4e09","value":" 400/400 [00:00&lt;00:00, 3287.63 examples/s]"}},"0eeb221d7e0b42c695fb70f9b351e8e6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":"hidden","width":null}},"5e28df63028b426da963f70f33c560d6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f6394ad7c2e447a39031524bb7e8bee7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"82d30fc6fc2e46ada32af5ddc808951b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4e1a6d0b34b144338eae1b4882a1f3aa":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"dff211d5707f41e0a202a04792c8cb43":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f72483b283c546b7b71a29ade27a4e09":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c0aa0092cbd74be086ca4ab035b334dd":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_06c36a3b801e431bacc6f47a69c59968","IPY_MODEL_21b6933ac5eb4ec296f503e29ef93ebe","IPY_MODEL_4c5cca197ab24de7a30ed0b7801c9a04"],"layout":"IPY_MODEL_4e61dc0eeaab4d0bbb734d34c669dd00"}},"06c36a3b801e431bacc6f47a69c59968":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e0522898541148e58a145ecbb02e2431","placeholder":"​","style":"IPY_MODEL_c3d1fe7d3db3413cab3a22e0f41f653c","value":"Map:  98%"}},"21b6933ac5eb4ec296f503e29ef93ebe":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"","description":"","description_tooltip":null,"layout":"IPY_MODEL_a4837543fc0342629feb381d84d45deb","max":5099,"min":0,"orientation":"horizontal","style":"IPY_MODEL_0c079df05e3a4bc29cbd3e191f5d62b5","value":5099}},"4c5cca197ab24de7a30ed0b7801c9a04":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8046a9c516744d328923a3efe4b08cd3","placeholder":"​","style":"IPY_MODEL_d896354ae62d4a2694842b0a69268025","value":" 5000/5099 [00:01&lt;00:00, 3979.36 examples/s]"}},"4e61dc0eeaab4d0bbb734d34c669dd00":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":"hidden","width":null}},"e0522898541148e58a145ecbb02e2431":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c3d1fe7d3db3413cab3a22e0f41f653c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a4837543fc0342629feb381d84d45deb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0c079df05e3a4bc29cbd3e191f5d62b5":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"8046a9c516744d328923a3efe4b08cd3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d896354ae62d4a2694842b0a69268025":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"markdown","source":["# Lora for Book\n"],"metadata":{"id":"IOQxm2USAdk_"}},{"cell_type":"code","source":["%%capture\n","pip install -q transformers peft datasets accelerate bitsandbytes"],"metadata":{"id":"zix9utFgAgmw","executionInfo":{"status":"ok","timestamp":1684862960919,"user_tz":-180,"elapsed":4609,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}}},"execution_count":42,"outputs":[]},{"cell_type":"code","source":["#LORA.py\n","#from tqdm import tqdm\n","import os, torch, pandas as pd  \n","from google.colab import drive\n","drive.mount(\"/content/drive\")\n","#os.chdir(\"/content/drive/MyDrive/DataScienceLab/LLM\")"],"metadata":{"id":"ExyXKcmdApCU","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1684862999115,"user_tz":-180,"elapsed":3742,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"2af8b09f-20f7-41a1-915d-28f522396964"},"execution_count":43,"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"]}]},{"cell_type":"code","source":["#model_names=[\"google/flan-t5-xxl\",\"google/flan-t5-xl\",\"google/flan-t5-large\"]\n","from transformers import AutoModelForSeq2SeqLM\n","model_names=[\"t5-base\",\"t5-large\",\"google/flan-t5-xl\"]\n","for model_name in model_names:\n","  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)\n","  model_8bit = AutoModelForSeq2SeqLM.from_pretrained(model_name, \n","                                                     load_in_8bit=True, \n","                                                     torch_dtype=torch.float16, \n","                                                     device_map='auto')\n","  m_size=model.get_memory_footprint()\n","  m_8bit_size=model_8bit.get_memory_footprint()\n","  print(f\"Model {model_name}: The original size is {m_size/2**30:.2f} GB, and 8bit:{m_8bit_size/2**30:.2f} GB\")\n","  del model\n","  del model_8bit"],"metadata":{"id":"mXV4FyldAuTQ","colab":{"base_uri":"https://localhost:8080/","height":136,"referenced_widgets":["9e8c01bebd1143fcbed9497b3b408d0c","fcbb91e4bd204c839a2be730663b401b","493a7dbd0d48411687f7c027bc735995","8e3d180feb564c8384a18c411e0ac8b4","4e4fab46b35f4107ae855b46e0ebdbd1","3144d2eb23744f56a8e623c68568134e","3c8daa8ea9ce4ab0a2b17ea225ecfa2b","49aeecf9231e421eb7e7a57ac6f99326","016230e67725421498e1658f898d2802","544a7e6c6e894431ad441fed9963d5fd","2271d6d9d7fa4267aaadd9286c2ee8f2","57015f35b7174035846b2de909a91231","dcdb3205db7f40dd89e7b30d5ee12c48","414d623a720941c38d8a040f1f190d79","1f9154f849ab496e9ad68097df48c8a7","d1c912fdd4f341a5875fbe1f2e7f9ee7","74a593ceaafc4ed1a63c4a2b8aa2e52a","0b9db00634b3402184692d6e3eee5f66","8b60a8b20d2a425b938eac315384e566","889939a0ea3644afb5d82d6b230f3928","9951c651e8124717823560a17ebf50cb","ff74469f66764f89821cd7767c6ff8d0"]},"executionInfo":{"status":"ok","timestamp":1684510221879,"user_tz":-180,"elapsed":55433,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"c0c37fcf-8fb8-41ae-f094-806e9553bf9f"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Model t5-base: The original size is 0.83 GB, and 8bit:0.39 GB\n","Model t5-large: The original size is 2.75 GB, and 8bit:1.28 GB\n"]},{"output_type":"display_data","data":{"text/plain":["Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9e8c01bebd1143fcbed9497b3b408d0c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"57015f35b7174035846b2de909a91231"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Model google/flan-t5-xl: The original size is 10.62 GB, and 8bit:4.18 GB\n"]}]},{"cell_type":"code","source":["model_name_or_path = \"t5-small\""],"metadata":{"id":"yYO0OlubFFXx","executionInfo":{"status":"ok","timestamp":1684863000135,"user_tz":-180,"elapsed":3,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}}},"execution_count":44,"outputs":[]},{"cell_type":"code","source":["# data preprocessing\n","from transformers import AutoTokenizer\n","tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)\n","\n","def preprocess_function(examples):\n","    inputs = examples[text_column]\n","    targets = examples[label_column]\n","    model_inputs = tokenizer(inputs, max_length=max_length, \n","                             padding=\"max_length\", \n","                             truncation=True, \n","                             return_tensors=\"pt\")\n","    labels = tokenizer(targets, \n","                       max_length=max_target_len, \n","                       padding=\"max_length\", \n","                       truncation=True, \n","                       return_tensors=\"pt\")\n","    labels = labels[\"input_ids\"]\n","    labels[labels == tokenizer.pad_token_id] = -100\n","    model_inputs[\"labels\"] = labels\n","    return model_inputs"],"metadata":{"id":"iaTLLk8pFH-Q","executionInfo":{"status":"ok","timestamp":1684863002996,"user_tz":-180,"elapsed":714,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}}},"execution_count":45,"outputs":[]},{"cell_type":"code","source":["from peft import get_peft_config, get_peft_model, get_peft_model_state_dict, LoraConfig, TaskType\n","from transformers import AutoConfig, AutoModelForSeq2SeqLM\n","import torch\n","\n","peft_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM, \n","                         inference_mode=False, \n","                         r=8, lora_alpha=32, lora_dropout=0.1)\n","model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path, \n","                                              load_in_8bit=True,\n","                                              torch_dtype=torch.float16, \n","                                              device_map='auto')\n","model = get_peft_model(model, peft_config)\n","model.print_trainable_parameters()"],"metadata":{"id":"_NdGmtdhFljA","executionInfo":{"status":"ok","timestamp":1684863012660,"user_tz":-180,"elapsed":1109,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"colab":{"base_uri":"https://localhost:8080/"},"outputId":"bd650600-ff9c-41ea-cd0d-49d2daaa9dad"},"execution_count":46,"outputs":[{"output_type":"stream","name":"stdout","text":["trainable params: 294912 || all params: 60801536 || trainable%: 0.4850403779272945\n"]}]},{"cell_type":"code","source":["from datasets import load_dataset, Dataset, DatasetDict\n","dataset = load_dataset(\"snli\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":67,"referenced_widgets":["17c9881396b74ca6ae5090f2a194f69c","c0bdf2475aa447f3875ebc6f8a8a76a8","f0624e09f92c439f91ae13dc9b5adf96","384b8f71077e405a9f04b42bef8149c4","015c86573d7e4a948d33575053cf0385","31b65b30a33b4ca79b37d99e3329b0cc","389f5ec78b354785bd97538ac7f12bac","847385c97f6c4aee93f20630f18ed134","278da2b240834b1da66431fc315a4510","068a82e35d394be3a8463e487e8d19a3","0b963b43ed7c45a58d616bd4743a0126"]},"id":"vS-FxA6cH4eB","executionInfo":{"status":"ok","timestamp":1684863016951,"user_tz":-180,"elapsed":1993,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"4f2d2dce-6f87-4743-bb56-8b703e3ee282"},"execution_count":47,"outputs":[{"output_type":"stream","name":"stderr","text":["WARNING:datasets.builder:Found cached dataset snli (/root/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b)\n"]},{"output_type":"display_data","data":{"text/plain":["  0%|          | 0/3 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"17c9881396b74ca6ae5090f2a194f69c"}},"metadata":{}}]},{"cell_type":"code","source":["import pandas as pd\n","snli_data=pd.DataFrame(dataset[\"train\"])\n","snli_data=snli_data.sample(frac=0.01)"],"metadata":{"id":"Agbm3PpuIVFF","executionInfo":{"status":"ok","timestamp":1684863034969,"user_tz":-180,"elapsed":17376,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}}},"execution_count":48,"outputs":[]},{"cell_type":"code","source":["snli_data.shape"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Dw5dPgcCI1e2","executionInfo":{"status":"ok","timestamp":1684863034969,"user_tz":-180,"elapsed":47,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"2b2fcd18-6ffe-48b0-d139-67fa6167c136"},"execution_count":49,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(5502, 3)"]},"metadata":{},"execution_count":49}]},{"cell_type":"code","source":["snli_data.head(2)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":112},"id":"_BeWP6JiI90c","executionInfo":{"status":"ok","timestamp":1684863034970,"user_tz":-180,"elapsed":43,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"bd699e06-19c3-481c-9ef0-a2d8cb0bf530"},"execution_count":50,"outputs":[{"output_type":"execute_result","data":{"text/plain":["                                                  premise  \\\n","451411  An older gentleman in an orange jumpsuit and g...   \n","70668               Boy and girl running along the beach.   \n","\n","                                    hypothesis  label  \n","451411  A man is cleaning up around his house.      1  \n","70668             Two people run on the beach.      0  "],"text/html":["\n","  <div id=\"df-aa72f312-5a97-4c12-a63c-ddba374a0138\">\n","    <div class=\"colab-df-container\">\n","      <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>premise</th>\n","      <th>hypothesis</th>\n","      <th>label</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>451411</th>\n","      <td>An older gentleman in an orange jumpsuit and g...</td>\n","      <td>A man is cleaning up around his house.</td>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>70668</th>\n","      <td>Boy and girl running along the beach.</td>\n","      <td>Two people run on the beach.</td>\n","      <td>0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-aa72f312-5a97-4c12-a63c-ddba374a0138')\"\n","              title=\"Convert this dataframe to an interactive table.\"\n","              style=\"display:none;\">\n","        \n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n","    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n","  </svg>\n","      </button>\n","      \n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      flex-wrap:wrap;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","      <script>\n","        const buttonEl =\n","          document.querySelector('#df-aa72f312-5a97-4c12-a63c-ddba374a0138 button.colab-df-convert');\n","        buttonEl.style.display =\n","          google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","        async function convertToInteractive(key) {\n","          const element = document.querySelector('#df-aa72f312-5a97-4c12-a63c-ddba374a0138');\n","          const dataTable =\n","            await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                     [key], {});\n","          if (!dataTable) return;\n","\n","          const docLinkHtml = 'Like what you see? Visit the ' +\n","            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","            + ' to learn more about interactive tables.';\n","          element.innerHTML = '';\n","          dataTable['output_type'] = 'display_data';\n","          await google.colab.output.renderOutput(dataTable, element);\n","          const docLink = document.createElement('div');\n","          docLink.innerHTML = docLinkHtml;\n","          element.appendChild(docLink);\n","        }\n","      </script>\n","    </div>\n","  </div>\n","  "]},"metadata":{},"execution_count":50}]},{"cell_type":"code","source":["snli_data.label.value_counts()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"2cYHx5IVJGOc","executionInfo":{"status":"ok","timestamp":1684863034970,"user_tz":-180,"elapsed":40,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"d589207d-c2df-41e3-bf04-0dfd4ebe2bbf"},"execution_count":51,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" 2    1904\n"," 0    1812\n"," 1    1783\n","-1       3\n","Name: label, dtype: int64"]},"metadata":{},"execution_count":51}]},{"cell_type":"code","source":["snli_data= snli_data[snli_data.label>-1]"],"metadata":{"id":"MfUyDVa1JMjH","executionInfo":{"status":"ok","timestamp":1684863034970,"user_tz":-180,"elapsed":33,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}}},"execution_count":52,"outputs":[]},{"cell_type":"code","source":["names=dataset[\"train\"].features[\"label\"].names\n","names"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hS33IugLWz7y","executionInfo":{"status":"ok","timestamp":1684863034971,"user_tz":-180,"elapsed":33,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"c2899345-5f6b-4ec4-ecf9-b18fd113bfbb"},"execution_count":53,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['entailment', 'neutral', 'contradiction']"]},"metadata":{},"execution_count":53}]},{"cell_type":"code","source":["mapp=dict(enumerate(names))\n","mapp"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"LZIzc-gvXpme","executionInfo":{"status":"ok","timestamp":1684863034971,"user_tz":-180,"elapsed":28,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"5ff82225-7440-4212-b74d-8e62bb652ee6"},"execution_count":54,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{0: 'entailment', 1: 'neutral', 2: 'contradiction'}"]},"metadata":{},"execution_count":54}]},{"cell_type":"code","source":["snli_dataframe= pd.DataFrame(snli_data)\n","snli_dataframe[\"text\"]= snli_dataframe\\\n","      .apply(lambda x: \"S1:\" +x.premise +\"S2:\"+x.hypothesis+\n","             \" S1 and S2 are labeled as entailment, neutral or contradiction\", \n","            axis=1)\n","snli_dataframe[\"label\"]=snli_dataframe\\\n","        .apply(lambda x: f\"They are {mapp[x.label]}\", axis=1)"],"metadata":{"id":"kuh-mX_bYqUw","executionInfo":{"status":"ok","timestamp":1684863035945,"user_tz":-180,"elapsed":998,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}}},"execution_count":55,"outputs":[]},{"cell_type":"code","source":["snli_dataframe.shape[0]"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"FssCotx8ahta","executionInfo":{"status":"ok","timestamp":1684863035946,"user_tz":-180,"elapsed":22,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"38c7e249-78ea-4de5-9017-b67f2dcc0684"},"execution_count":56,"outputs":[{"output_type":"execute_result","data":{"text/plain":["5499"]},"metadata":{},"execution_count":56}]},{"cell_type":"code","source":["from datasets import load_dataset, Dataset, DatasetDict\n","snli_data2= DatasetDict({\"train\": Dataset.from_pandas(snli_dataframe[:400]),\n","                        \"validation\": Dataset.from_pandas(snli_dataframe[400:]),\n","                        })"],"metadata":{"id":"sLUSmL3ZaZvg","executionInfo":{"status":"ok","timestamp":1684863035947,"user_tz":-180,"elapsed":20,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}}},"execution_count":57,"outputs":[]},{"cell_type":"code","source":["import datasets\n","text_column = \"text\"\n","label_column = \"label\"\n","max_length = 512\n","max_target_len=32\n","\n","processed_datasets = snli_data2.map(\n","    preprocess_function,\n","    batched=True,\n","    num_proc=1,\n","    remove_columns=snli_data2[\"train\"].column_names,\n","    load_from_cache_file=False,\n","  )\n","\n","train_dataset = processed_datasets[\"train\"]\n","eval_dataset = processed_datasets[\"validation\"]"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":17,"referenced_widgets":["8a159fbe557442fd88c90fab4ac89a73","e01b9ea70d1c47309049add36cd51f23","7c141986b4124973be44466e631847c5","be48a134036c4a23994f62ed7aec397f","0eeb221d7e0b42c695fb70f9b351e8e6","5e28df63028b426da963f70f33c560d6","f6394ad7c2e447a39031524bb7e8bee7","82d30fc6fc2e46ada32af5ddc808951b","4e1a6d0b34b144338eae1b4882a1f3aa","dff211d5707f41e0a202a04792c8cb43","f72483b283c546b7b71a29ade27a4e09","c0aa0092cbd74be086ca4ab035b334dd","06c36a3b801e431bacc6f47a69c59968","21b6933ac5eb4ec296f503e29ef93ebe","4c5cca197ab24de7a30ed0b7801c9a04","4e61dc0eeaab4d0bbb734d34c669dd00","e0522898541148e58a145ecbb02e2431","c3d1fe7d3db3413cab3a22e0f41f653c","a4837543fc0342629feb381d84d45deb","0c079df05e3a4bc29cbd3e191f5d62b5","8046a9c516744d328923a3efe4b08cd3","d896354ae62d4a2694842b0a69268025"]},"id":"aVi7UszdH3hZ","executionInfo":{"status":"ok","timestamp":1684863036514,"user_tz":-180,"elapsed":586,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"436355c3-e4db-4832-c9fd-4ec8ca9360ac"},"execution_count":58,"outputs":[{"output_type":"display_data","data":{"text/plain":["Map:   0%|          | 0/400 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"8a159fbe557442fd88c90fab4ac89a73"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Map:   0%|          | 0/5099 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c0aa0092cbd74be086ca4ab035b334dd"}},"metadata":{}}]},{"cell_type":"code","source":["snli_data2[\"train\"].column_names"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"5rBE8n60bYuB","executionInfo":{"status":"ok","timestamp":1684863040305,"user_tz":-180,"elapsed":3,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"0831526b-06a7-42f8-b1e1-5e5abdf2da12"},"execution_count":59,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['premise', 'hypothesis', 'label', 'text', '__index_level_0__']"]},"metadata":{},"execution_count":59}]},{"cell_type":"code","source":["from torch.utils.data import DataLoader\n","from transformers import default_data_collator,  get_linear_schedule_with_warmup\n","batch_size=16\n","train_dataloader = DataLoader(\n","    train_dataset, shuffle=True, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True\n",")\n","eval_dataloader = DataLoader(eval_dataset, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True)"],"metadata":{"id":"Bpm0Aob-o-6q","executionInfo":{"status":"ok","timestamp":1684863068555,"user_tz":-180,"elapsed":461,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}}},"execution_count":60,"outputs":[]},{"cell_type":"markdown","source":[],"metadata":{"id":"2TTMQePJpXw9"}},{"cell_type":"code","source":["# optimizer and lr scheduler\n","from tqdm import tqdm\n","\n","device=\"cuda\"\n","global_step = 0\n","num_epochs=3\n","lr = 1e-3\n","\n","optimizer = torch.optim.AdamW(model.parameters(), lr=lr)\n","lr_scheduler = get_linear_schedule_with_warmup(\n","    optimizer=optimizer,\n","    num_warmup_steps=0,\n","    num_training_steps=(len(train_dataloader) * num_epochs),\n",")\n","\n","#model.base_model.peft_config.total_step = len(train_dataloader) * num_epochs\n","\n","\n","# training and evaluation\n","#model = model.to(device)\n","\n","for epoch in range(num_epochs):\n","    model.train()\n","    total_loss = 0\n","    for step, batch in enumerate(tqdm(train_dataloader)):\n","        batch = {k: v.to(device) for k, v in batch.items()}\n","        outputs = model(**batch)\n","        loss = outputs.loss\n","        total_loss += loss.detach().float()\n","        loss.backward()\n","        optimizer.step()\n","        lr_scheduler.step()\n","        # Update the importance of low-rank matrices\n","        # and allocate the budget accordingly.\n","        model.base_model.update_and_allocate(global_step)\n","        optimizer.zero_grad()\n","        global_step += 1\n","\n","    model.eval()\n","    eval_loss = 0\n","    eval_preds = []\n","    for step, batch in enumerate(tqdm(eval_dataloader)):\n","        batch = {k: v.to(device) for k, v in batch.items()}\n","        with torch.no_grad():\n","            outputs = model(**batch)\n","        loss = outputs.loss\n","        eval_loss += loss.detach().float()\n","        eval_preds.extend(\n","            tokenizer.batch_decode(torch.argmax(outputs.logits, -1).detach().cpu().numpy(), skip_special_tokens=True)\n","        )\n","\n","    eval_epoch_loss = eval_loss / len(train_dataloader)\n","    eval_ppl = torch.exp(eval_epoch_loss)\n","    train_epoch_loss = total_loss / len(eval_dataloader)\n","    train_ppl = torch.exp(train_epoch_loss)\n","    print(f\"{epoch=}: {train_ppl=} {train_epoch_loss=} {eval_ppl=} {eval_epoch_loss=}\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":837},"id":"8bz3LI6io87J","executionInfo":{"status":"error","timestamp":1684863072654,"user_tz":-180,"elapsed":526,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"a0a66d89-3b61-456c-a90b-ff7ea6192dba"},"execution_count":61,"outputs":[{"output_type":"stream","name":"stderr","text":["  0%|          | 0/25 [00:00<?, ?it/s]\n"]},{"output_type":"display_data","data":{"text/plain":["\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/peft/tuners/\u001b[0m\u001b[1;33mlora.py\u001b[0m:\u001b[94m278\u001b[0m in \u001b[92m__getattr__\u001b[0m                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m275 \u001b[0m\u001b[2m│   \u001b[0m\u001b[94mdef\u001b[0m \u001b[92m__getattr__\u001b[0m(\u001b[96mself\u001b[0m, name: \u001b[96mstr\u001b[0m):                                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m276 \u001b[0m\u001b[2;90m│   │   \u001b[0m\u001b[33m\"\"\"Forward missing attributes to the wrapped module.\"\"\"\u001b[0m                            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m277 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mtry\u001b[0m:                                                                               \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m278 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().\u001b[92m__getattr__\u001b[0m(name)  \u001b[2m# defer to nn.Module's logic\u001b[0m                 \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m279 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mexcept\u001b[0m \u001b[96mAttributeError\u001b[0m:                                                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m280 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mgetattr\u001b[0m(\u001b[96mself\u001b[0m.model, name)                                               \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m281 \u001b[0m                                                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1614\u001b[0m in \u001b[92m__getattr__\u001b[0m           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1611 \u001b[0m\u001b[2m│   │   │   \u001b[0mmodules = \u001b[96mself\u001b[0m.\u001b[91m__dict__\u001b[0m[\u001b[33m'\u001b[0m\u001b[33m_modules\u001b[0m\u001b[33m'\u001b[0m]                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1612 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mif\u001b[0m name \u001b[95min\u001b[0m modules:                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1613 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[94mreturn\u001b[0m modules[name]                                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1614 \u001b[2m│   │   \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mAttributeError\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33m'\u001b[0m\u001b[33m{}\u001b[0m\u001b[33m'\u001b[0m\u001b[33m object has no attribute \u001b[0m\u001b[33m'\u001b[0m\u001b[33m{}\u001b[0m\u001b[33m'\u001b[0m\u001b[33m\"\u001b[0m.format(                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1615 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[96mtype\u001b[0m(\u001b[96mself\u001b[0m).\u001b[91m__name__\u001b[0m, name))                                                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1616 \u001b[0m\u001b[2m│   \u001b[0m                                                                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1617 \u001b[0m\u001b[2m│   \u001b[0m\u001b[94mdef\u001b[0m \u001b[92m__setattr__\u001b[0m(\u001b[96mself\u001b[0m, name: \u001b[96mstr\u001b[0m, value: Union[Tensor, \u001b[33m'\u001b[0m\u001b[33mModule\u001b[0m\u001b[33m'\u001b[0m]) -> \u001b[94mNone\u001b[0m:             \u001b[31m│\u001b[0m\n","\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n","\u001b[1;91mAttributeError: \u001b[0m\u001b[32m'LoraModel'\u001b[0m object has no attribute \u001b[32m'update_and_allocate'\u001b[0m\n","\n","\u001b[3mDuring handling of the above exception, another exception occurred:\u001b[0m\n","\n","\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n","\u001b[31m│\u001b[0m in \u001b[92m<cell line: 22>\u001b[0m:\u001b[94m35\u001b[0m                                                                            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/peft/tuners/\u001b[0m\u001b[1;33mlora.py\u001b[0m:\u001b[94m280\u001b[0m in \u001b[92m__getattr__\u001b[0m                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m277 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mtry\u001b[0m:                                                                               \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m278 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().\u001b[92m__getattr__\u001b[0m(name)  \u001b[2m# defer to nn.Module's logic\u001b[0m                 \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m279 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mexcept\u001b[0m \u001b[96mAttributeError\u001b[0m:                                                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m280 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mgetattr\u001b[0m(\u001b[96mself\u001b[0m.model, name)                                               \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m281 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m282 \u001b[0m\u001b[2m│   \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mget_peft_config_as_dict\u001b[0m(\u001b[96mself\u001b[0m, inference: \u001b[96mbool\u001b[0m = \u001b[94mFalse\u001b[0m):                            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m283 \u001b[0m\u001b[2m│   │   \u001b[0mconfig_dict = {}                                                                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1614\u001b[0m in \u001b[92m__getattr__\u001b[0m           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1611 \u001b[0m\u001b[2m│   │   │   \u001b[0mmodules = \u001b[96mself\u001b[0m.\u001b[91m__dict__\u001b[0m[\u001b[33m'\u001b[0m\u001b[33m_modules\u001b[0m\u001b[33m'\u001b[0m]                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1612 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mif\u001b[0m name \u001b[95min\u001b[0m modules:                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1613 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[94mreturn\u001b[0m modules[name]                                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1614 \u001b[2m│   │   \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mAttributeError\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33m'\u001b[0m\u001b[33m{}\u001b[0m\u001b[33m'\u001b[0m\u001b[33m object has no attribute \u001b[0m\u001b[33m'\u001b[0m\u001b[33m{}\u001b[0m\u001b[33m'\u001b[0m\u001b[33m\"\u001b[0m.format(                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1615 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[96mtype\u001b[0m(\u001b[96mself\u001b[0m).\u001b[91m__name__\u001b[0m, name))                                                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1616 \u001b[0m\u001b[2m│   \u001b[0m                                                                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1617 \u001b[0m\u001b[2m│   \u001b[0m\u001b[94mdef\u001b[0m \u001b[92m__setattr__\u001b[0m(\u001b[96mself\u001b[0m, name: \u001b[96mstr\u001b[0m, value: Union[Tensor, \u001b[33m'\u001b[0m\u001b[33mModule\u001b[0m\u001b[33m'\u001b[0m]) -> \u001b[94mNone\u001b[0m:             \u001b[31m│\u001b[0m\n","\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n","\u001b[1;91mAttributeError: \u001b[0m\u001b[32m'T5ForConditionalGeneration'\u001b[0m object has no attribute \u001b[32m'update_and_allocate'\u001b[0m\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/peft/tuners/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">lora.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">278</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">__getattr__</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">275 │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">__getattr__</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>, name: <span style=\"color: #00ffff; text-decoration-color: #00ffff\">str</span>):                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">276 </span><span style=\"color: #bfbfbf; text-decoration-color: #bfbfbf\">│   │   </span><span style=\"color: #808000; text-decoration-color: #808000\">\"\"\"Forward missing attributes to the wrapped module.\"\"\"</span>                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">277 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">try</span>:                                                                               <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>278 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">super</span>().<span style=\"color: #00ff00; text-decoration-color: #00ff00\">__getattr__</span>(name)  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># defer to nn.Module's logic</span>                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">279 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">except</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">AttributeError</span>:                                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">280 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">getattr</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.model, name)                                               <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">281 </span>                                                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1614</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">__getattr__</span>           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1611 │   │   │   </span>modules = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.<span style=\"color: #ff0000; text-decoration-color: #ff0000\">__dict__</span>[<span style=\"color: #808000; text-decoration-color: #808000\">'_modules'</span>]                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1612 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> name <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">in</span> modules:                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1613 │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> modules[name]                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1614 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">raise</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">AttributeError</span>(<span style=\"color: #808000; text-decoration-color: #808000\">\"'{}' object has no attribute '{}'\"</span>.format(                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1615 │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">type</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>).<span style=\"color: #ff0000; text-decoration-color: #ff0000\">__name__</span>, name))                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1616 │   </span>                                                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1617 │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">__setattr__</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>, name: <span style=\"color: #00ffff; text-decoration-color: #00ffff\">str</span>, value: Union[Tensor, <span style=\"color: #808000; text-decoration-color: #808000\">'Module'</span>]) -&gt; <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>:             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n","<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">AttributeError: </span><span style=\"color: #008000; text-decoration-color: #008000\">'LoraModel'</span> object has no attribute <span style=\"color: #008000; text-decoration-color: #008000\">'update_and_allocate'</span>\n","\n","<span style=\"font-style: italic\">During handling of the above exception, another exception occurred:</span>\n","\n","<span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">&lt;cell line: 22&gt;</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">35</span>                                                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/peft/tuners/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">lora.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">280</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">__getattr__</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">277 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">try</span>:                                                                               <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">278 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">super</span>().<span style=\"color: #00ff00; text-decoration-color: #00ff00\">__getattr__</span>(name)  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># defer to nn.Module's logic</span>                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">279 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">except</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">AttributeError</span>:                                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>280 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">getattr</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.model, name)                                               <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">281 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">282 │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">get_peft_config_as_dict</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>, inference: <span style=\"color: #00ffff; text-decoration-color: #00ffff\">bool</span> = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">False</span>):                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">283 │   │   </span>config_dict = {}                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1614</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">__getattr__</span>           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1611 │   │   │   </span>modules = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.<span style=\"color: #ff0000; text-decoration-color: #ff0000\">__dict__</span>[<span style=\"color: #808000; text-decoration-color: #808000\">'_modules'</span>]                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1612 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> name <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">in</span> modules:                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1613 │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> modules[name]                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1614 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">raise</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">AttributeError</span>(<span style=\"color: #808000; text-decoration-color: #808000\">\"'{}' object has no attribute '{}'\"</span>.format(                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1615 │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">type</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>).<span style=\"color: #ff0000; text-decoration-color: #ff0000\">__name__</span>, name))                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1616 │   </span>                                                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1617 │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">__setattr__</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>, name: <span style=\"color: #00ffff; text-decoration-color: #00ffff\">str</span>, value: Union[Tensor, <span style=\"color: #808000; text-decoration-color: #808000\">'Module'</span>]) -&gt; <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>:             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n","<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">AttributeError: </span><span style=\"color: #008000; text-decoration-color: #008000\">'T5ForConditionalGeneration'</span> object has no attribute <span style=\"color: #008000; text-decoration-color: #008000\">'update_and_allocate'</span>\n","</pre>\n"]},"metadata":{}}]},{"cell_type":"code","source":["#pip install evaluate rouge_score"],"metadata":{"id":"eLFmQhzbN5jW","executionInfo":{"status":"ok","timestamp":1684861847001,"user_tz":-180,"elapsed":8,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}}},"execution_count":24,"outputs":[]},{"cell_type":"code","source":["#import evaluate\n","#metric1 = evaluate.load(\"rouge\")\n","#metric2 = evaluate.load(\"f1\")\n","\n","#import nltk\n","import numpy as np\n","from transformers import AutoTokenizer, DataCollatorForSeq2Seq\n","from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer\n","\n","\n","    \n"," #model = AutoModelForSeq2SeqLM.from_pretrained(\"t5-base\")\n","data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)\n","\n","training_args = Seq2SeqTrainingArguments(\n","    output_dir=\"./results\",\n","    evaluation_strategy=\"epoch\",\n","   save_strategy=\"epoch\",\n","    learning_rate=2e-5,\n","    per_device_train_batch_size=8,\n","    per_device_eval_batch_size=8,\n","    weight_decay=0.01,\n","    save_total_limit=3,\n","    num_train_epochs=4,\n","    #fp16=True,\n","    #load_best_model_at_end=True,\n","    predict_with_generate=False\n",")\n","\n","trainer = Seq2SeqTrainer(\n","    model=model,\n","    args=training_args,\n","    train_dataset= train_dataset,\n","    eval_dataset= eval_dataset,\n","    tokenizer=tokenizer,\n","    data_collator=data_collator\n",")\n","\n","res_t=trainer.train()"],"metadata":{"id":"YKeu8y9CAen-","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"error","timestamp":1684862642661,"user_tz":-180,"elapsed":12527,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"e5ba8f27-551f-4977-bdf0-6fd2845c737e"},"execution_count":30,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/transformers/optimization.py:407: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n","  warnings.warn(\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n","    <div>\n","      \n","      <progress value='51' max='200' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [ 51/200 00:07 < 00:21, 6.99 it/s, Epoch 1/4]\n","    </div>\n","    <table border=\"1\" class=\"dataframe\">\n","  <thead>\n"," <tr style=\"text-align: left;\">\n","      <th>Epoch</th>\n","      <th>Training Loss</th>\n","      <th>Validation Loss</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","  </tbody>\n","</table><p>\n","    <div>\n","      \n","      <progress value='24' max='637' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [ 24/637 00:01 < 00:45, 13.59 it/s]\n","    </div>\n","    "]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n","\u001b[31m│\u001b[0m in \u001b[92m<cell line: 39>\u001b[0m:\u001b[94m39\u001b[0m                                                                            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m1664\u001b[0m in \u001b[92mtrain\u001b[0m                    \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1661 \u001b[0m\u001b[2m│   │   \u001b[0minner_training_loop = find_executable_batch_size(                                 \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1662 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[96mself\u001b[0m._inner_training_loop, \u001b[96mself\u001b[0m._train_batch_size, args.auto_find_batch_size  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1663 \u001b[0m\u001b[2m│   │   \u001b[0m)                                                                                 \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1664 \u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m inner_training_loop(                                                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1665 \u001b[0m\u001b[2m│   │   │   \u001b[0margs=args,                                                                    \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1666 \u001b[0m\u001b[2m│   │   │   \u001b[0mresume_from_checkpoint=resume_from_checkpoint,                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1667 \u001b[0m\u001b[2m│   │   │   \u001b[0mtrial=trial,                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2034\u001b[0m in \u001b[92m_inner_training_loop\u001b[0m     \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2031 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[96mself\u001b[0m.control.should_training_stop = \u001b[94mTrue\u001b[0m                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2032 \u001b[0m\u001b[2m│   │   │   \u001b[0m                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2033 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[96mself\u001b[0m.control = \u001b[96mself\u001b[0m.callback_handler.on_epoch_end(args, \u001b[96mself\u001b[0m.state, \u001b[96mself\u001b[0m.con  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2034 \u001b[2m│   │   │   \u001b[0m\u001b[96mself\u001b[0m._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2035 \u001b[0m\u001b[2m│   │   │   \u001b[0m                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2036 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mif\u001b[0m DebugOption.TPU_METRICS_DEBUG \u001b[95min\u001b[0m \u001b[96mself\u001b[0m.args.debug:                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2037 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[94mif\u001b[0m is_torch_tpu_available():                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2300\u001b[0m in \u001b[92m_maybe_log_save_evaluate\u001b[0m \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2297 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0m)                                                                     \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2298 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0mmetrics.update(dataset_metrics)                                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2299 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                         \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2300 \u001b[2m│   │   │   │   \u001b[0mmetrics = \u001b[96mself\u001b[0m.evaluate(ignore_keys=ignore_keys_for_eval)                 \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2301 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[96mself\u001b[0m._report_to_hp_search(trial, \u001b[96mself\u001b[0m.state.global_step, metrics)             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2302 \u001b[0m\u001b[2m│   │   │   \u001b[0m                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2303 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[2m# Run delayed LR scheduler now that metrics are populated\u001b[0m                     \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer_seq2seq.py\u001b[0m:\u001b[94m159\u001b[0m in \u001b[92mevaluate\u001b[0m          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m156 \u001b[0m\u001b[2m│   │   \u001b[0m)                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m157 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[96mself\u001b[0m._gen_kwargs = gen_kwargs                                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m158 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m159 \u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m160 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m161 \u001b[0m\u001b[2m│   \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mpredict\u001b[0m(                                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m162 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[96mself\u001b[0m,                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m3029\u001b[0m in \u001b[92mevaluate\u001b[0m                 \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3026 \u001b[0m\u001b[2m│   │   \u001b[0mstart_time = time.time()                                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3027 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3028 \u001b[0m\u001b[2m│   │   \u001b[0meval_loop = \u001b[96mself\u001b[0m.prediction_loop \u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.use_legacy_prediction_loop \u001b[94melse\u001b[0m \u001b[96mse\u001b[0m  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m3029 \u001b[2m│   │   \u001b[0moutput = eval_loop(                                                               \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3030 \u001b[0m\u001b[2m│   │   │   \u001b[0meval_dataloader,                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3031 \u001b[0m\u001b[2m│   │   │   \u001b[0mdescription=\u001b[33m\"\u001b[0m\u001b[33mEvaluation\u001b[0m\u001b[33m\"\u001b[0m,                                                     \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3032 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[2m# No point gathering the predictions if there are no metrics, otherwise we d\u001b[0m  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m3210\u001b[0m in \u001b[92mevaluation_loop\u001b[0m          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3207 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0mbatch_size = observed_batch_size                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3208 \u001b[0m\u001b[2m│   │   │   \u001b[0m                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3209 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[2m# Prediction step\u001b[0m                                                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m3210 \u001b[2m│   │   │   \u001b[0mloss, logits, labels = \u001b[96mself\u001b[0m.prediction_step(model, inputs, prediction_loss_o  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3211 \u001b[0m\u001b[2m│   │   │   \u001b[0minputs_decode = \u001b[96mself\u001b[0m._prepare_input(inputs[\u001b[33m\"\u001b[0m\u001b[33minput_ids\u001b[0m\u001b[33m\"\u001b[0m]) \u001b[94mif\u001b[0m args.include_inp  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3212 \u001b[0m\u001b[2m│   │   │   \u001b[0m                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3213 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mif\u001b[0m is_torch_tpu_available():                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer_seq2seq.py\u001b[0m:\u001b[94m247\u001b[0m in \u001b[92mprediction_step\u001b[0m   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m244 \u001b[0m\u001b[2;33m│   │   \u001b[0m\u001b[33m\"\"\"\u001b[0m                                                                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m245 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m246 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m \u001b[96mself\u001b[0m.args.predict_with_generate \u001b[95mor\u001b[0m prediction_loss_only:                    \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m247 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().prediction_step(                                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m248 \u001b[0m\u001b[2m│   │   │   │   \u001b[0mmodel, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ig   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m249 \u001b[0m\u001b[2m│   │   │   \u001b[0m)                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m250 \u001b[0m                                                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m3466\u001b[0m in \u001b[92mprediction_step\u001b[0m          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3463 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                         \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3464 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[94mif\u001b[0m has_labels \u001b[95mor\u001b[0m loss_without_labels:                                     \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3465 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0m\u001b[94mwith\u001b[0m \u001b[96mself\u001b[0m.compute_loss_context_manager():                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m3466 \u001b[2m│   │   │   │   │   │   \u001b[0mloss, outputs = \u001b[96mself\u001b[0m.compute_loss(model, inputs, return_outputs=  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3467 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0mloss = loss.mean().detach()                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3468 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0m                                                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m3469 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96misinstance\u001b[0m(outputs, \u001b[96mdict\u001b[0m):                                         \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2767\u001b[0m in \u001b[92mcompute_loss\u001b[0m             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2764 \u001b[0m\u001b[2m│   │   │   \u001b[0mlabels = inputs.pop(\u001b[33m\"\u001b[0m\u001b[33mlabels\u001b[0m\u001b[33m\"\u001b[0m)                                                 \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2765 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2766 \u001b[0m\u001b[2m│   │   │   \u001b[0mlabels = \u001b[94mNone\u001b[0m                                                                 \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2767 \u001b[2m│   │   \u001b[0moutputs = model(**inputs)                                                         \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2768 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Save past state if it exists\u001b[0m                                                    \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2769 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# TODO: this needs to be fixed and made cleaner later.\u001b[0m                            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m2770 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.past_index >= \u001b[94m0\u001b[0m:                                                     \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1498 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1499 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1500 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks):                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1502 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1503 \u001b[0m\u001b[2m│   │   \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], []                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1504 \u001b[0m\u001b[2m│   │   \u001b[0mbackward_pre_hooks = []                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m162 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad():                                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m163 \u001b[0m\u001b[2m│   │   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m164 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m166 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output)                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m167 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m168 \u001b[0m\u001b[2m│   \u001b[0mmodule.forward = new_forward                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/t5/\u001b[0m\u001b[1;33mmodeling_t5.py\u001b[0m:\u001b[94m1716\u001b[0m in \u001b[92mforward\u001b[0m    \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1713 \u001b[0m\u001b[2m│   │   │   │   \u001b[0mdecoder_attention_mask = decoder_attention_mask.to(\u001b[96mself\u001b[0m.decoder.first_de  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1714 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1715 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Decode\u001b[0m                                                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1716 \u001b[2m│   │   \u001b[0mdecoder_outputs = \u001b[96mself\u001b[0m.decoder(                                                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1717 \u001b[0m\u001b[2m│   │   │   \u001b[0minput_ids=decoder_input_ids,                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1718 \u001b[0m\u001b[2m│   │   │   \u001b[0mattention_mask=decoder_attention_mask,                                        \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1719 \u001b[0m\u001b[2m│   │   │   \u001b[0minputs_embeds=decoder_inputs_embeds,                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1498 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1499 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1500 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks):                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1502 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1503 \u001b[0m\u001b[2m│   │   \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], []                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1504 \u001b[0m\u001b[2m│   │   \u001b[0mbackward_pre_hooks = []                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m162 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad():                                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m163 \u001b[0m\u001b[2m│   │   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m164 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m166 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output)                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m167 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m168 \u001b[0m\u001b[2m│   \u001b[0mmodule.forward = new_forward                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/t5/\u001b[0m\u001b[1;33mmodeling_t5.py\u001b[0m:\u001b[94m1086\u001b[0m in \u001b[92mforward\u001b[0m    \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1083 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0m\u001b[94mNone\u001b[0m,  \u001b[2m# past_key_value is always None with gradient checkpointing\u001b[0m    \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1084 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m)                                                                         \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1085 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                         \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1086 \u001b[2m│   │   │   │   \u001b[0mlayer_outputs = layer_module(                                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1087 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0mhidden_states,                                                        \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1088 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0mattention_mask=extended_attention_mask,                               \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1089 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0mposition_bias=position_bias,                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1498 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1499 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1500 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks):                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1502 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1503 \u001b[0m\u001b[2m│   │   \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], []                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1504 \u001b[0m\u001b[2m│   │   \u001b[0mbackward_pre_hooks = []                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m162 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad():                                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m163 \u001b[0m\u001b[2m│   │   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m164 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m166 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output)                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m167 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m168 \u001b[0m\u001b[2m│   \u001b[0mmodule.forward = new_forward                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/t5/\u001b[0m\u001b[1;33mmodeling_t5.py\u001b[0m:\u001b[94m693\u001b[0m in \u001b[92mforward\u001b[0m     \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 690 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 691 \u001b[0m\u001b[2m│   │   │   \u001b[0mself_attn_past_key_value, cross_attn_past_key_value = \u001b[94mNone\u001b[0m, \u001b[94mNone\u001b[0m              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 692 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 693 \u001b[2m│   │   \u001b[0mself_attention_outputs = \u001b[96mself\u001b[0m.layer[\u001b[94m0\u001b[0m](                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 694 \u001b[0m\u001b[2m│   │   │   \u001b[0mhidden_states,                                                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 695 \u001b[0m\u001b[2m│   │   │   \u001b[0mattention_mask=attention_mask,                                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 696 \u001b[0m\u001b[2m│   │   │   \u001b[0mposition_bias=position_bias,                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1498 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1499 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1500 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks):                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1502 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1503 \u001b[0m\u001b[2m│   │   \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], []                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1504 \u001b[0m\u001b[2m│   │   \u001b[0mbackward_pre_hooks = []                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m162 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad():                                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m163 \u001b[0m\u001b[2m│   │   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m164 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m166 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output)                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m167 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m168 \u001b[0m\u001b[2m│   \u001b[0mmodule.forward = new_forward                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/t5/\u001b[0m\u001b[1;33mmodeling_t5.py\u001b[0m:\u001b[94m600\u001b[0m in \u001b[92mforward\u001b[0m     \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 597 \u001b[0m\u001b[2m│   │   \u001b[0moutput_attentions=\u001b[94mFalse\u001b[0m,                                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 598 \u001b[0m\u001b[2m│   \u001b[0m):                                                                                    \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 599 \u001b[0m\u001b[2m│   │   \u001b[0mnormed_hidden_states = \u001b[96mself\u001b[0m.layer_norm(hidden_states)                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 600 \u001b[2m│   │   \u001b[0mattention_output = \u001b[96mself\u001b[0m.SelfAttention(                                            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 601 \u001b[0m\u001b[2m│   │   │   \u001b[0mnormed_hidden_states,                                                         \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 602 \u001b[0m\u001b[2m│   │   │   \u001b[0mmask=attention_mask,                                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 603 \u001b[0m\u001b[2m│   │   │   \u001b[0mposition_bias=position_bias,                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1498 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1499 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1500 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks):                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1502 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1503 \u001b[0m\u001b[2m│   │   \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], []                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1504 \u001b[0m\u001b[2m│   │   \u001b[0mbackward_pre_hooks = []                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m162 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad():                                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m163 \u001b[0m\u001b[2m│   │   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m164 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m166 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output)                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m167 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m168 \u001b[0m\u001b[2m│   \u001b[0mmodule.forward = new_forward                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/t5/\u001b[0m\u001b[1;33mmodeling_t5.py\u001b[0m:\u001b[94m519\u001b[0m in \u001b[92mforward\u001b[0m     \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 516 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m hidden_states                                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 517 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 518 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# get query states\u001b[0m                                                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 519 \u001b[2m│   │   \u001b[0mquery_states = shape(\u001b[96mself\u001b[0m.q(hidden_states))  \u001b[2m# (batch_size, n_heads, seq_length,\u001b[0m  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 520 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 521 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# get key/value states\u001b[0m                                                            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 522 \u001b[0m\u001b[2m│   │   \u001b[0mkey_states = project(                                                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1498 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1499 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1500 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks):                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1502 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1503 \u001b[0m\u001b[2m│   │   \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], []                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1504 \u001b[0m\u001b[2m│   │   \u001b[0mbackward_pre_hooks = []                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m162 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad():                                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m163 \u001b[0m\u001b[2m│   │   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m164 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m166 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output)                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m167 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m168 \u001b[0m\u001b[2m│   \u001b[0mmodule.forward = new_forward                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/\u001b[0m\u001b[1;33mmodules.py\u001b[0m:\u001b[94m320\u001b[0m in \u001b[92mforward\u001b[0m                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m317 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.bias \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mand\u001b[0m \u001b[96mself\u001b[0m.bias.dtype != x.dtype:                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m318 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[96mself\u001b[0m.bias.data = \u001b[96mself\u001b[0m.bias.data.to(x.dtype)                                    \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m319 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m320 \u001b[2m│   │   \u001b[0mout = bnb.matmul(x, \u001b[96mself\u001b[0m.weight, bias=\u001b[96mself\u001b[0m.bias, state=\u001b[96mself\u001b[0m.state)                 \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m321 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m322 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m \u001b[96mself\u001b[0m.state.has_fp16_weights:                                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m323 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.state.CB \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mand\u001b[0m \u001b[96mself\u001b[0m.state.CxB \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m:                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/\u001b[0m\u001b[1;33m_functions.py\u001b[0m:\u001b[94m500\u001b[0m in \u001b[92mmatmul\u001b[0m        \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m497 \u001b[0m\u001b[2m│   \u001b[0mstate = state \u001b[95mor\u001b[0m MatmulLtState()                                                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m498 \u001b[0m\u001b[2m│   \u001b[0m\u001b[94mif\u001b[0m threshold > \u001b[94m0.0\u001b[0m:                                                                    \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m499 \u001b[0m\u001b[2m│   │   \u001b[0mstate.threshold = threshold                                                        \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m500 \u001b[2m│   \u001b[0m\u001b[94mreturn\u001b[0m MatMul8bitLt.apply(A, B, out, bias, state)                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m501 \u001b[0m                                                                                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/autograd/\u001b[0m\u001b[1;33mfunction.py\u001b[0m:\u001b[94m506\u001b[0m in \u001b[92mapply\u001b[0m                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m503 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m torch._C._are_functorch_transforms_active():                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m504 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[2m# See NOTE: [functorch vjp and autograd interaction]\u001b[0m                           \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m505 \u001b[0m\u001b[2m│   │   │   \u001b[0margs = _functorch.utils.unwrap_dead_wrappers(args)                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m506 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().apply(*args, **kwargs)  \u001b[2m# type: ignore[misc]\u001b[0m                    \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m507 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m508 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mcls\u001b[0m.setup_context == _SingleLevelFunction.setup_context:                        \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m509 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mRuntimeError\u001b[0m(                                                            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/\u001b[0m\u001b[1;33m_functions.py\u001b[0m:\u001b[94m397\u001b[0m in \u001b[92mforward\u001b[0m       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m394 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# 3. Matmul\u001b[0m                                                                        \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m395 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m using_igemmlt:                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m396 \u001b[0m\u001b[2m│   │   │   \u001b[0mC32A, SA = F.transform(CA, \u001b[33m\"\u001b[0m\u001b[33mcol32\u001b[0m\u001b[33m\"\u001b[0m)                                            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m397 \u001b[2m│   │   │   \u001b[0mout32, Sout32 = F.igemmlt(C32A, state.CxB, SA, state.SB)                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m398 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mif\u001b[0m bias \u001b[95mis\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mor\u001b[0m bias.dtype == torch.float16:                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m399 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[2m# we apply the fused bias here\u001b[0m                                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m400 \u001b[0m\u001b[2m│   │   │   │   \u001b[0moutput = F.mm_dequant(out32, Sout32, SCA, state.SCB, bias=bias)            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/\u001b[0m\u001b[1;33mfunctional.py\u001b[0m:\u001b[94m1391\u001b[0m in \u001b[92migemmlt\u001b[0m               \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1388 \u001b[0m\u001b[2m│   \u001b[0mprev_device = A.device                                                                \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1389 \u001b[0m\u001b[2m│   \u001b[0mtorch.cuda.set_device(A.device)                                                       \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1390 \u001b[0m\u001b[2m│   \u001b[0m                                                                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1391 \u001b[2m│   \u001b[0mptr = CUBLAS_Context.get_instance().get_context(A.device)                             \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1392 \u001b[0m\u001b[2m│   \u001b[0mptrA = get_ptr(A)                                                                     \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1393 \u001b[0m\u001b[2m│   \u001b[0mptrB = get_ptr(B)                                                                     \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m1394 \u001b[0m\u001b[2m│   \u001b[0mptrC = get_ptr(out)                                                                   \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/\u001b[0m\u001b[1;33mfunctional.py\u001b[0m:\u001b[94m122\u001b[0m in \u001b[92mget_context\u001b[0m            \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 119 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[96mcls\u001b[0m._instance.initialize()                                                    \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 120 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mcls\u001b[0m._instance                                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 121 \u001b[0m\u001b[2m│   \u001b[0m                                                                                      \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 122 \u001b[2m│   \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mget_context\u001b[0m(\u001b[96mself\u001b[0m, device):                                                        \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 123 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m device.index \u001b[95mnot\u001b[0m \u001b[95min\u001b[0m \u001b[96mself\u001b[0m.context:                                              \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 124 \u001b[0m\u001b[2m│   │   │   \u001b[0mprev_device = torch.cuda.current_device()                                     \u001b[31m│\u001b[0m\n","\u001b[31m│\u001b[0m   \u001b[2m 125 \u001b[0m\u001b[2m│   │   │   \u001b[0mtorch.cuda.set_device(device)                                                 \u001b[31m│\u001b[0m\n","\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n","\u001b[1;91mKeyboardInterrupt\u001b[0m\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">&lt;cell line: 39&gt;</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">39</span>                                                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1664</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">train</span>                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1661 │   │   </span>inner_training_loop = find_executable_batch_size(                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1662 │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._inner_training_loop, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._train_batch_size, args.auto_find_batch_size  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1663 │   │   </span>)                                                                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1664 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> inner_training_loop(                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1665 │   │   │   </span>args=args,                                                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1666 │   │   │   </span>resume_from_checkpoint=resume_from_checkpoint,                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1667 │   │   │   </span>trial=trial,                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">2034</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_inner_training_loop</span>     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2031 │   │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.control.should_training_stop = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">True</span>                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2032 │   │   │   </span>                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2033 │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.control = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.callback_handler.on_epoch_end(args, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.state, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.con  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>2034 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2035 │   │   │   </span>                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2036 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> DebugOption.TPU_METRICS_DEBUG <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">in</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.args.debug:                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2037 │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> is_torch_tpu_available():                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">2300</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_maybe_log_save_evaluate</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2297 │   │   │   │   │   </span>)                                                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2298 │   │   │   │   │   </span>metrics.update(dataset_metrics)                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2299 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>2300 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   │   </span>metrics = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.evaluate(ignore_keys=ignore_keys_for_eval)                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2301 │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._report_to_hp_search(trial, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.state.global_step, metrics)             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2302 │   │   │   </span>                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2303 │   │   │   # Run delayed LR scheduler now that metrics are populated</span>                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer_seq2seq.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">159</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">evaluate</span>          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">156 │   │   </span>)                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">157 │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._gen_kwargs = gen_kwargs                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">158 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>159 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">super</span>().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">160 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">161 │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">predict</span>(                                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">162 │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>,                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">3029</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">evaluate</span>                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3026 │   │   </span>start_time = time.time()                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3027 │   │   </span>                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3028 │   │   </span>eval_loop = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.prediction_loop <span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.args.use_legacy_prediction_loop <span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">se</span>  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>3029 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>output = eval_loop(                                                               <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3030 │   │   │   </span>eval_dataloader,                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3031 │   │   │   </span>description=<span style=\"color: #808000; text-decoration-color: #808000\">\"Evaluation\"</span>,                                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3032 │   │   │   # No point gathering the predictions if there are no metrics, otherwise we d</span>  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">3210</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">evaluation_loop</span>          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3207 │   │   │   │   │   </span>batch_size = observed_batch_size                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3208 │   │   │   </span>                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3209 │   │   │   # Prediction step</span>                                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>3210 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>loss, logits, labels = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.prediction_step(model, inputs, prediction_loss_o  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3211 │   │   │   </span>inputs_decode = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._prepare_input(inputs[<span style=\"color: #808000; text-decoration-color: #808000\">\"input_ids\"</span>]) <span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> args.include_inp  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3212 │   │   │   </span>                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3213 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> is_torch_tpu_available():                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer_seq2seq.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">247</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">prediction_step</span>   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">244 </span><span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">│   │   </span><span style=\"color: #808000; text-decoration-color: #808000\">\"\"\"</span>                                                                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">245 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">246 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.args.predict_with_generate <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> prediction_loss_only:                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>247 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">super</span>().prediction_step(                                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">248 │   │   │   │   </span>model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ig   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">249 │   │   │   </span>)                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">250 </span>                                                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">3466</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">prediction_step</span>          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3463 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3464 │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> has_labels <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> loss_without_labels:                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3465 │   │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.compute_loss_context_manager():                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>3466 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   │   │   │   </span>loss, outputs = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.compute_loss(model, inputs, return_outputs=  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3467 │   │   │   │   │   </span>loss = loss.mean().detach()                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3468 │   │   │   │   │   </span>                                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3469 │   │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">isinstance</span>(outputs, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">dict</span>):                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">2767</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">compute_loss</span>             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2764 │   │   │   </span>labels = inputs.pop(<span style=\"color: #808000; text-decoration-color: #808000\">\"labels\"</span>)                                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2765 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2766 │   │   │   </span>labels = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>                                                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>2767 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>outputs = model(**inputs)                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2768 │   │   # Save past state if it exists</span>                                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2769 │   │   # TODO: this needs to be fixed and made cleaner later.</span>                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2770 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.args.past_index &gt;= <span style=\"color: #0000ff; text-decoration-color: #0000ff\">0</span>:                                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1501</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_call_impl</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1498 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> (<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._forward_hooks   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1499 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_hooks                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1500 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_pre_hooks):                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1501 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> forward_call(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1502 │   │   # Do not call functions when jit is used</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1503 │   │   </span>full_backward_hooks, non_full_backward_hooks = [], []                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504 │   │   </span>backward_pre_hooks = []                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/accelerate/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">hooks.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">165</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">new_forward</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">162 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> torch.no_grad():                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">163 │   │   │   │   </span>output = old_forward(*args, **kwargs)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">164 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>165 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>output = old_forward(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">166 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> module._hf_hook.post_forward(module, output)                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">167 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">168 │   </span>module.forward = new_forward                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/models/t5/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">modeling_t5.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1716</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1713 │   │   │   │   </span>decoder_attention_mask = decoder_attention_mask.to(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.decoder.first_de  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1714 │   │   </span>                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1715 │   │   # Decode</span>                                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1716 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>decoder_outputs = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.decoder(                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1717 │   │   │   </span>input_ids=decoder_input_ids,                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1718 │   │   │   </span>attention_mask=decoder_attention_mask,                                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1719 │   │   │   </span>inputs_embeds=decoder_inputs_embeds,                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1501</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_call_impl</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1498 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> (<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._forward_hooks   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1499 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_hooks                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1500 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_pre_hooks):                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1501 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> forward_call(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1502 │   │   # Do not call functions when jit is used</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1503 │   │   </span>full_backward_hooks, non_full_backward_hooks = [], []                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504 │   │   </span>backward_pre_hooks = []                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/accelerate/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">hooks.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">165</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">new_forward</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">162 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> torch.no_grad():                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">163 │   │   │   │   </span>output = old_forward(*args, **kwargs)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">164 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>165 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>output = old_forward(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">166 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> module._hf_hook.post_forward(module, output)                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">167 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">168 │   </span>module.forward = new_forward                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/models/t5/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">modeling_t5.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1086</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1083 │   │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>,  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># past_key_value is always None with gradient checkpointing</span>    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1084 │   │   │   │   </span>)                                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1085 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1086 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   │   </span>layer_outputs = layer_module(                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1087 │   │   │   │   │   </span>hidden_states,                                                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1088 │   │   │   │   │   </span>attention_mask=extended_attention_mask,                               <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1089 │   │   │   │   │   </span>position_bias=position_bias,                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1501</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_call_impl</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1498 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> (<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._forward_hooks   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1499 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_hooks                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1500 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_pre_hooks):                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1501 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> forward_call(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1502 │   │   # Do not call functions when jit is used</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1503 │   │   </span>full_backward_hooks, non_full_backward_hooks = [], []                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504 │   │   </span>backward_pre_hooks = []                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/accelerate/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">hooks.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">165</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">new_forward</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">162 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> torch.no_grad():                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">163 │   │   │   │   </span>output = old_forward(*args, **kwargs)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">164 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>165 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>output = old_forward(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">166 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> module._hf_hook.post_forward(module, output)                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">167 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">168 │   </span>module.forward = new_forward                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/models/t5/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">modeling_t5.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">693</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 690 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 691 │   │   │   </span>self_attn_past_key_value, cross_attn_past_key_value = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>, <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 692 │   │   </span>                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 693 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>self_attention_outputs = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.layer[<span style=\"color: #0000ff; text-decoration-color: #0000ff\">0</span>](                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 694 │   │   │   </span>hidden_states,                                                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 695 │   │   │   </span>attention_mask=attention_mask,                                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 696 │   │   │   </span>position_bias=position_bias,                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1501</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_call_impl</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1498 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> (<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._forward_hooks   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1499 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_hooks                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1500 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_pre_hooks):                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1501 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> forward_call(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1502 │   │   # Do not call functions when jit is used</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1503 │   │   </span>full_backward_hooks, non_full_backward_hooks = [], []                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504 │   │   </span>backward_pre_hooks = []                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/accelerate/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">hooks.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">165</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">new_forward</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">162 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> torch.no_grad():                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">163 │   │   │   │   </span>output = old_forward(*args, **kwargs)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">164 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>165 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>output = old_forward(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">166 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> module._hf_hook.post_forward(module, output)                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">167 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">168 │   </span>module.forward = new_forward                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/models/t5/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">modeling_t5.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">600</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 597 │   │   </span>output_attentions=<span style=\"color: #0000ff; text-decoration-color: #0000ff\">False</span>,                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 598 │   </span>):                                                                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 599 │   │   </span>normed_hidden_states = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.layer_norm(hidden_states)                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 600 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>attention_output = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.SelfAttention(                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 601 │   │   │   </span>normed_hidden_states,                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 602 │   │   │   </span>mask=attention_mask,                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 603 │   │   │   </span>position_bias=position_bias,                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1501</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_call_impl</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1498 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> (<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._forward_hooks   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1499 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_hooks                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1500 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_pre_hooks):                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1501 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> forward_call(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1502 │   │   # Do not call functions when jit is used</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1503 │   │   </span>full_backward_hooks, non_full_backward_hooks = [], []                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504 │   │   </span>backward_pre_hooks = []                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/accelerate/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">hooks.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">165</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">new_forward</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">162 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> torch.no_grad():                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">163 │   │   │   │   </span>output = old_forward(*args, **kwargs)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">164 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>165 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>output = old_forward(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">166 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> module._hf_hook.post_forward(module, output)                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">167 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">168 │   </span>module.forward = new_forward                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/models/t5/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">modeling_t5.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">519</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 516 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> hidden_states                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 517 │   │   </span>                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 518 │   │   # get query states</span>                                                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 519 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>query_states = shape(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.q(hidden_states))  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># (batch_size, n_heads, seq_length,</span>  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 520 │   │   </span>                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 521 │   │   # get key/value states</span>                                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 522 │   │   </span>key_states = project(                                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1501</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_call_impl</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1498 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> (<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._forward_hooks   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1499 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_hooks                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1500 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_pre_hooks):                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1501 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> forward_call(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1502 │   │   # Do not call functions when jit is used</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1503 │   │   </span>full_backward_hooks, non_full_backward_hooks = [], []                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504 │   │   </span>backward_pre_hooks = []                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/accelerate/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">hooks.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">165</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">new_forward</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">162 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> torch.no_grad():                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">163 │   │   │   │   </span>output = old_forward(*args, **kwargs)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">164 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>165 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>output = old_forward(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">166 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> module._hf_hook.post_forward(module, output)                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">167 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">168 │   </span>module.forward = new_forward                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">modules.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">320</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">317 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.bias <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">is</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">and</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.bias.dtype != x.dtype:                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">318 │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.bias.data = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.bias.data.to(x.dtype)                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">319 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>320 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>out = bnb.matmul(x, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.weight, bias=<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.bias, state=<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.state)                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">321 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">322 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.state.has_fp16_weights:                                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">323 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.state.CB <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">is</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">and</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.state.CxB <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">is</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>:                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">_functions.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">500</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">matmul</span>        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">497 │   </span>state = state <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> MatmulLtState()                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">498 │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> threshold &gt; <span style=\"color: #0000ff; text-decoration-color: #0000ff\">0.0</span>:                                                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">499 │   │   </span>state.threshold = threshold                                                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>500 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> MatMul8bitLt.apply(A, B, out, bias, state)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">501 </span>                                                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/autograd/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">function.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">506</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">apply</span>                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">503 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> torch._C._are_functorch_transforms_active():                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">504 │   │   │   # See NOTE: [functorch vjp and autograd interaction]</span>                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">505 │   │   │   </span>args = _functorch.utils.unwrap_dead_wrappers(args)                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>506 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">super</span>().apply(*args, **kwargs)  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># type: ignore[misc]</span>                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">507 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">508 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">cls</span>.setup_context == _SingleLevelFunction.setup_context:                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">509 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">raise</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">RuntimeError</span>(                                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">_functions.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">397</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">394 │   │   # 3. Matmul</span>                                                                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">395 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> using_igemmlt:                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">396 │   │   │   </span>C32A, SA = F.transform(CA, <span style=\"color: #808000; text-decoration-color: #808000\">\"col32\"</span>)                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>397 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>out32, Sout32 = F.igemmlt(C32A, state.CxB, SA, state.SB)                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">398 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> bias <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">is</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> bias.dtype == torch.float16:                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">399 │   │   │   │   # we apply the fused bias here</span>                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">400 │   │   │   │   </span>output = F.mm_dequant(out32, Sout32, SCA, state.SCB, bias=bias)            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/bitsandbytes/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">functional.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1391</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">igemmlt</span>               <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1388 │   </span>prev_device = A.device                                                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1389 │   </span>torch.cuda.set_device(A.device)                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1390 │   </span>                                                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1391 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   </span>ptr = CUBLAS_Context.get_instance().get_context(A.device)                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1392 │   </span>ptrA = get_ptr(A)                                                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1393 │   </span>ptrB = get_ptr(B)                                                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1394 │   </span>ptrC = get_ptr(out)                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/bitsandbytes/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">functional.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">122</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">get_context</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 119 │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">cls</span>._instance.initialize()                                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 120 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">cls</span>._instance                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 121 │   </span>                                                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 122 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">get_context</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>, device):                                                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 123 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> device.index <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">in</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.context:                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 124 │   │   │   </span>prev_device = torch.cuda.current_device()                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 125 │   │   │   </span>torch.cuda.set_device(device)                                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n","<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n","<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">KeyboardInterrupt</span>\n","</pre>\n"]},"metadata":{}}]},{"cell_type":"code","source":["eval_dataset"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"vZIH96iCh__6","executionInfo":{"status":"ok","timestamp":1684441405429,"user_tz":-180,"elapsed":9,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"8cf733f2-9781-423f-da1c-7cb35a800562"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Dataset({\n","    features: ['input_ids', 'attention_mask', 'labels'],\n","    num_rows: 150\n","})"]},"metadata":{},"execution_count":47}]},{"cell_type":"code","source":["import datasets\n","text_column = \"text\"\n","label_column = \"label\"\n","max_length = 512\n","max_target_len=32\n","\n","processed_datasets = snli_data2.map(\n","    preprocess_function,\n","    batched=True,\n","    num_proc=1,\n","    #remove_columns=snli_data2[\"train\"].column_names,\n","    load_from_cache_file=False,\n","  )\n","\n","train_dataset = processed_datasets[\"train\"]\n","eval_dataset = processed_datasets[\"validation\"]"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":17,"referenced_widgets":["481107966aab4156a59431683e0df7b6","dbe1cb37650b484a9fcfaa348ab50a43","9495e17f703b4215b59ded17a7b9ab74","25baacb633ed43aca8d4a8c8e4a9916b","e723c6f2ed854b40804ac5ab4b65f1d7","3673056c227f4073bf4344a7a01a7ed6","81f8775d6464467e88c63e8a21a31e80","74fcc9d14c1d41a48b20176a52cc13ca","242eaeab548e4dc5a2c03950e604e627","43295c865e11477384102d802e8976e5","e7b5570d2ee94b318723c26916e9e9ec","26878d2d98844aae9fe193e6657a1356","0b27c869b2fd4c7697c3b538b3572d47","88e27d88192e42879e1ccaf5374cabf3","cb6e7d847c9047d1ba62630250c4073e","d299b2d31cc04fa4aec1755ae6cc1f75","daa952c83ff643fba859e9f4f6f7ccc6","09e144fa726c4720b28048395e06f2a1","64881cdc4f254b96b87d1116bb571f46","3cc12c445d0e48488f619d84e9f5d135","d27a2d3470b64cf9b3229db4d4360ba2","ec6114f0321a4f43a31dbe9f9c0a42b9"]},"id":"BrvY15AIiT7I","executionInfo":{"status":"ok","timestamp":1684441490707,"user_tz":-180,"elapsed":12,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"68265859-6828-4104-c195-ec6b63e4d24a"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["Map:   0%|          | 0/400 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"481107966aab4156a59431683e0df7b6"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Map:   0%|          | 0/150 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"26878d2d98844aae9fe193e6657a1356"}},"metadata":{}}]},{"cell_type":"code","source":["eval_dataset"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"SyYaRvxJiW9h","executionInfo":{"status":"ok","timestamp":1684441499464,"user_tz":-180,"elapsed":10,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"bf8d0efc-3124-420a-f16a-a8b0ae870717"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Dataset({\n","    features: ['premise', 'hypothesis', 'label', 'text', '__index_level_0__', 'input_ids', 'attention_mask', 'labels'],\n","    num_rows: 150\n","})"]},"metadata":{},"execution_count":49}]},{"cell_type":"code","source":["text_column"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":36},"id":"oDeROphZiZuZ","executionInfo":{"status":"ok","timestamp":1684441516629,"user_tz":-180,"elapsed":4,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"5b0f4881-3036-40be-af08-5b91c2f4f4da"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["'text'"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":51}]},{"cell_type":"code","source":["model.eval()\n","i = 13\n","inputs = tokenizer(eval_dataset[text_column][i], return_tensors=\"pt\")\n","print(eval_dataset[text_column][i])\n","print(eval_dataset[label_column][i])\n","\n","#print(inputs)\n","\n","with torch.no_grad():\n","    outputs = model.generate(input_ids=inputs[\"input_ids\"], max_new_tokens=10)\n","    print(outputs)\n","    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))"],"metadata":{"id":"-_vmCshEN3TK","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1684441599469,"user_tz":-180,"elapsed":1787,"user":{"displayName":"Savas Yıldırım","userId":"10717726124681851716"}},"outputId":"4b9455cc-d204-4f14-892a-1e8d559ba8df"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["S1:Two young men and a woman are standing at the back of a blue pickup truck sorting through vegetables with smiles on their faces.S2:Two young men and a woman are looking at vegetables and smiling. S1 and S2 are labeled as entailment, neutral or contradiction\n","They are entailment\n","tensor([[   0,  328,   33, 7163,    1]])\n","['They are neutral']\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"F1Uu2qVfh0lY"},"execution_count":null,"outputs":[]}]}